\ Portable, Stack Based String Library for TurboForth V1.2
\ Version 1.0 - Mark Wills February 2014.

base @ \ save systems' current number base
decimal

application: strLib

256 \ maximum string stack size in bytes.
\ Adjust to your own needs. Choose a value that is a multiple 
\ of your systems' cell size.
constant ($sSize)          \ store stack size
here ($sSize) allot        \ reserve space for string stack 
constant ($sEnd)           \ bottom of string stack
variable ($sp)             \ pointer to top of string stack
($sEnd) ($sSize) + ($sp) ! \ initialise it
variable ($depth)          \ count of items on the string stack
variable ($temp0)          \ reserved for internal use
variable ($temp1)          \ reserved for internal use
variable ($temp2)          \ reserved for internal use
variable ($temp3)          \ reserved for internal use

\ Throw codes used by this library:
: (throw) ( code -- )
    case 
        9900 of ." String stack underflow" endof
        9901 of ." String too large to assign" endof 
        9902 of ." String stack is empty" endof 
        9903 of ." Need at least 2 strings on string stack" endof
        9904 of ." String too large for string constant" endof 
        9905 of ." Illegal LEN value" endof 
        9906 of ." Need at least 3 strings on string stack" endof 
        9907 of ." String is not a legal number" endof
        9908 of ." Illegal start value" endof
    endcase
    cr abort ;

: ($depth+) ( -- )
    \ increments the string stack item count
    1 ($depth) +! ;

: ($sp@) ( -- addr ) ($sp) @ ;

: ($rUp) ( n -- n|n+1)
    \ rounds n up to the next even value
    1+ -2 and ;

: cell+ ( n -- n+2) compile 2+ ; immediate

: (sizeOf$) ( $addr - $size)
    \ given an address of a transient string, compute the stack
    \ size in bytes required to hold it, rounded up to the
    \ nearest even cell size, and including the length cell.
    @ ($rUp) cell+ ;

: (set$SP) ( $size -- ) 
    \ given the stack size of a transient string set the string
    \ stack pointer to the new address required to accomodate it.
    negate dup ($sp@) + ($sEnd) < if 9900 (throw) then 
    ($sp) +! ;
    
: (addrOf$) ( index -- addr )
    \ given an index into the string stack, return the start
    \ address of the string. addr points to the length cell.
    \ topmost string is index 0
    \ next string is index 1 and so on
    ($sp@) swap dup if 0 do 
        dup (sizeOf$) + loop else drop then ;
    
: (lenOf$) ( $addr -- len )
    \ given the address of a transient string on the string
    \ stack (the address of the length cell), return the length
    \ of the string.
    state @ if compile @ else @ then ; immediate
    
: reset$ ( -- ) ( "reset i.e. empty the string stack")
    0 ($depth) !  ($sEnd) ($sSize) + ($sp) ! ;

: depth$ ( -- $sDepth) \ "depth of string stack"
    \ returns the current depth of the string stack.
    ($depth) @ ;

: $const ( max_len tib:"name" -- ) ( runtime: -- $Caddr) \ "string constant"
    \ creates a string constant
    \ when name is referenced the address of the max_len field 
    \ is pushed to the stack.
    \ e.g. 100 string fred \ create a string called fred 
    create  dup ( max_len) , ( actual_len) 0 ,  allot align ;
    
: clen$ ( $Caddr -- len ) \ "string constant length"
    \ given the address of a string constant, returns its 
    \ length.
    cell+ @ ;
    
: maxLen$ ( $Caddr -- max_len ) \ "string constant maximum length"
    \ given the address of a string constant, returns its 
    \ maximum length
    (lenOf$) ;

: .$const ( $Caddr -- ) \ "display string constant"
    \ displays the string constant. e.g. fred .$const
    cell+ dup (lenOf$) swap cell+ swap type ;
    
: :=" ( $Caddr tib:"string" -- ) \ "assign string constant"
    \ assigns the string "string" to the string constant
    \ e.g. fred :=" hello mother!"
    dup @ ascii " word swap >r  2dup < if 9901 (throw) then
    nip 2dup swap cell+ !
    >r [ 2 cells ] literal + r> r> -rot cmove ;

: ($") ( addr len -- ) ( ss: -- str )
    \ run-time action for $" (see below)
    dup ($rUp) cell+ (set$SP)
    dup ($sp@) !  ($sp@) cell+ swap cmove  ($depth+) ;

: $" ( tib:"string" -- ) ( ss: -- str) \ "string to string stack"
    \ pushes a string directly to the string stack
    \ e.g. $" hello world" .$
    [compile] s"  state @ if compile ($") else ($") then ; immediate 

: >$ ( $Caddr -- ) ( ss: -- str) \ "string constant to string stack"
    \ moves a string constant to the string stack
    \ e.g. fred >$
    cell+ dup (lenOf$) swap cell+ swap ($") ;

: pick$ ( n -- ) ( ss: -- strN) \ "pick string"
    \ given an index into the string stack, copy the indexed
    \ string to the top of the string stack.
    \ 0 $pick is equivalent to $DUP
    \ 1 $pick is equivalent to $OVER etc.
    depth$ 0= if 9902 (throw) then 
    (addrOf$) dup (lenOf$) swap cell+ swap ($") ;

: dup$ ( -- ) ( ss: s1 -- s1 s1) \ "duplicate string"
    \ duplicates a string on the string stack
    depth$ 0= if 9902 (throw) then  0 pick$ ;

: drop$ ( -- ) ( ss: str -- ) \ "drop string"
    \ drops the top string from the string stack
    depth$ 0= if 9902 (throw) then
    ($sp@) (sizeOf$) negate (set$SP)   -1 ($depth) +! ;
    
: swap$ ( -- ) ( ss: s1 s2 -- s2 s1) \ "swap strings"
    \ swaps the top two string items on the string stack
    depth$ 2 < if 9903 (throw) then 
    ($sp@) dup (sizeOf$) here swap cmove
    1 (addrOf$) dup (sizeOf$) ($sp@) swap cmove
    here dup (sizeOf$)  ($sp@) dup (sizeOf$) + swap cmove ;

: nip$ ( -- ) ( ss: s1 s2 -- s2) \ "nip strings"
    \ remove the string under the top string
    depth$ 2 < if 9903 (throw) then  swap$ drop$ ;
    
: over$ ( -- ) ( ss: s1 s2 -- s1 s2 s1) \ "over string"
    \ move a copy of s1 to top of string stack
    depth$ 2 < if 9903 (throw) then  1 pick$ ;
    
: (rot$) ( -- ) ( ss: s6 s5 s4 s3 s2 s1 -- s3 s2 s1)
    ( internal factor of rot$ and -rot$. See below. )
    ( source:) ($sp@)  ( destination:) 3 (addrOf$)
    ( #bytes to move: ) 
    ($sp@) (sizeOf$)   1 (addrOf$) (sizeOf$)   2 (addrOf$) (sizeOf$) + + 
    ( move s1 to s3 into the space occupied by s4 to s6:) CMOVE
    ( adjust string stack pointer:) 3 (addrOf$) ($sp) !  -3 ($depth) +! ;

: rot$ ( -- ) ( ss: s3 s2 s1 -- s2 s1 s3) \ "string rotate left"
    \ rotates the top three strings to the left.
    depth$ 3 < if 9906 (throw) then 
    1 pick$  1 pick$  4 pick$ (rot$) ;

: -rot$ ( -- ) ( ss: s3 s2 s1 -- s1 s3 s2) \ "string rotate right"
    \ rotates the top three strings to the right.
    depth$ 3 < if 9906 (throw) then
    0 pick$  3 pick$  3 pick$ (rot$) ;
    
: len$ ( -- len ) ( ss: -- ) \ "length of string"
    \ returns the length of the topmost string.
    depth$ 1 < if 9902 (throw) then  ($sp@) @ ;    

: >$const ( $Caddr -- ) ( ss: str -- ) \ "to string constant"
    \ move top of string stack to the string constant
    \ e.g. $" blue" fred >$const  fred .$const 
    \ displays "blue"    
    >r  depth$ 1 < if 9902 (throw) then
    len$ r@ @ > if 9904 (throw) then
    ($sp@) dup (sizeOf$) r> cell+ swap cmove drop$ ;

: +$ ( -- ) ( ss: s1 s2 -- s2+s1) \ "concatenate strings"
    \ replaces the top most two strings on the string stack
    \ with their concatenated equivalent.
    \ eg: $" red" $" blue" $& .$
    \ displays "redblue"
    depth$ 2 < if 9903 (throw) then 
    1 (addrof$) cell+  here   1 (addrof$) (lenof$)  cmove
    ($sp@) cell+   1 (addrof$) (lenof$) here +  len$ cmove
    here len$ 1 (addrof$) (lenof$) +  drop$ drop$  ($") ;    

: mid$ ( start len -- ) ( ss: str1 -- str1 str2) \ "mid string"
    \ the characters from start to start+len are pushed to the string stack 
    \ as a new string. the original string is retained.
    depth$ 1 < if 9902 (throw) then 
    dup len$ >  over 1 < or  if 9905 (throw) then
    over dup len$ >  swap 0< or if 9908 (throw) then 
    swap ($sp@) cell+ +  swap  ($") ;

: left$ ( len -- ) ( ss: str1 -- str1 str2) \ "left string"
    \ the leftmost len characters are pushed to  the string 
    \ stack as a new string. The original string is retained.
    depth$ 1 < if 9902 (throw) then 
    dup len$ > over 1 < or if 9905 (throw) then 
    0 ($sp@) cell+ +  swap  ($") ;
   
: right$ ( len -- ) ( ss: str1 -- str1 str2) \ "right string"
    \ the rightmost len characters, pushed to the string stack
    \ as a new string. the original string is retained.
    depth$ 1 < if 9902 (throw) then 
    dup len$ > over 1 < or if 9905 (throw) then 
    ($sp@) (lenOf$) over - ($sp@) cell+ +  swap  ($") ;

: findc$ ( char -- pos|-1 ) ( ss: -- ) ( "find char")
    ( returns the first occurance of the character char in )
    ( the top string. The string is retained. )
    ( returns -1 if the char is not found )
    depth$ 1 < if 9902 (throw) then
    -1 ($temp0) ! ( assume not found )
    ($sp@) cell+  ($sp@) (lenOf$) 0 do
        dup c@ 2 pick = if i ($temp0) ! leave then 1+ loop
    drop drop ($temp0) @ ;

: find$ ( offset -- pos|-1 ) ( ss: s1 s2 -- s1) \ "find string"
    \ searches string str1, beginning at offset, for the substring str2.
    \ if the string is found, returns the position of the string relative
    \ to the offset, otherwise returns -1.
    depth$ 2 < if 9903 (throw) then 
    len$ ($temp1) !    1 (addrOf$) (lenOf$) ($temp0) !
    dup ($temp0) @ > if drop -1 exit then 
    1 (addrOf$) cell+ + ($temp2) !    ($sp@) cell+ ($temp3) !
    ($temp1) @ ($temp0) @ > if drop -1 exit then 
    0  ($temp0) @ 0 do
        ($temp3) @ over + c@ 
        ($temp2) @ i + c@ = if
            1+ dup ($temp1) @ = if 
                drop i ($temp1) @ - 1+   -2 leave then 
        else drop 0 then
    loop 
    dup -2 = if drop else drop -1 then drop$ ;

: .$ ( -- ) ( ss: str -- ) \ "display string"
    \ pop and display string from string stack
    depth$ 0= if 9902 (throw) then 
    ($sp@) cell+ ($sp@) (lenOf$) type  drop$ ;
    
: rev$ ( -- ) ( ss: s1 -- s2 ) \ "reverse string"
    \ reverse top string on string stack.
    depth$ 0= if 9902 (throw) then 
    ($sp@) dup cell+ >r  (lenOf$)  r> swap here swap cmove 
    ($sp@) (lenOf$) here 1- +
    ($sp@) cell+  dup ($sp@) (lenOf$) +   swap do
        dup c@ i c!  1- loop  drop ;

: ltrim$ ( -- ) ( ss: s1 -- s2 ) \ "left trim string"
    \ removes leading spaces from s1, resulting in s2.
    depth$ 0= if 9902 (throw) then  
    ($sp@) dup (lenOf$) >r  here over (sizeOf$)  cmove
    0  r> here cell+ dup >r +  r> do
        i c@ bl = if 1+ else leave then loop 
    dup 0> if 
        >r  ($sp@) (lenOf$)  drop$
        here cell+ r@ +  swap r> -  ($")
    else drop then ;

: rtrim$ ( -- ) ( ss: s1 -- s2 ) \ "right trim string"
    \ removes trailing spaces from s1, resulting in s2.
    depth$ 0= if 9902 (throw) then  rev$ ltrim$ rev$ ;

: trim$ ( -- ) ( ss: s1 -- s2 ) \ "trim string"
    \ remove both leading and trailing spaces from s1, 
    \ resulting in s2.
    rtrim$ ltrim$ ;

: replace$ ( -- pos ) \ "replace string"
    \ ( found: ss: s1 s2 s3 -- s4  not found: s1 s2 -- s1 s2)
    depth$ 3 < if 9906 (throw) then
    len$ >r
    0 find$ dup ($temp0) ! -1 > if
        ($sp@) cell+  here  ($temp0) @ cmove  
        1 (addrOf$) cell+   here ($temp0) @ +  
        1 (addrOf$) (lenof$) cmove
        ($sp@) cell+ ($temp0) @ + r@ +    
        here ($temp0) @ + 1 (addrOf$) (lenof$) +
        len$ r> - ($temp0) @ -  dup >r  cmove
        r> ($temp0) @ + 1 (addrOf$) (lenof$) +
        drop$ drop$ here swap ($")
    else r> drop ($temp0) @ then ;

: ucase$ ( -- ) ( ss: str -- STR) \ "to upper case"
    \ on the topmost string, converts all lower case characters
    \ to upper case.
    depth$ 1 < if 9902 (throw) then
    ($sp@) dup (lenOf$) + cell+  ($sp@) cell+  do
       i c@ dup [ char a ] literal  [ char { ] literal within if 
            32 -  i c! else drop then loop ;

: lcase$ ( -- ) ( ss: STR -- str) \ "to lower case"
    \ on the topmost string, converts all upper case characters
    \ to lower case.
    depth$ 1 < if 9902 (throw) then 
    ($sp@) dup (lenOf$) + cell+  ($sp@) cell+  do
       i c@ dup [ char A ] literal  [ char [ ] literal within if 
            32 +  i c! else drop then loop ;

: ==$? ( -- flag ) ( ss: -- ) \ "are strings equal?"
    \ performs a case-sensitive comparison of the topmost 
    \ two strings on the string stack, returning true if their 
    \ length and contents are identical, otherwise returning 
    \ false.
    depth$ 2 < if 9903 (throw) then 
    len$  1 (addrOf$) (lenOf$) = if
        1 (addrOf$) cell+ \ point to first char of string 1
        ($sp@) cell+  dup len$ + swap  do
            dup c@  i c@  <> if drop false leave then 1+ loop
        dup if drop true then 
    else false then ;
   
: val$ ( -- ud ) ( ss: str -- ) \ "value of string"
    \ interprets the topmost string as an integer number, returning its
    \ value on the data stack as an integer.
    \ Note that a string value can be converted to a double by pre-pending
    \ the number with a period. E.g. $" .9900" VAL$ 
    ($sp@) dup (lenOf$) swap cell+ swap 
    number if 9907 (throw) then drop$ ;

: $.s ( -- ) ( ss: -- ) \ "display string stack"
    cr  depth$ 0> if
        ($sp@)  depth$
        ."  Index|Length|String" cr
        ." ------+------+------" cr 
        0 begin
            depth$ 0> while
                dup 5 .r ." |" len$ 5 .r  ." |" .$  1+ cr
        repeat  drop
        ($depth) !  ($sp) !  cr
    else
        ." String stack is empty." cr
    then
    ." Allocated stack space:"
    ($sEnd) ($sSize) + ($sp@) - 4 .r ."  bytes" cr
    ."     Total stack space:"
    ($sSize) 4 .r ."  bytes" cr
    ." Stack space remaining:" 
    ($sp@) ($sEnd) - 4 .r ."  bytes" cr ; entry-point
    
: (sgn) ( n -- -1|0|+1 ) dup abs / ; 

: cmp$  ( -- -1|0|+1 )  ( ss: -- )  ( "compare strings" )
    ( performs a case-sensitive comparison of the topmost )
    ( two strings [s1, s2] on the string stack, returning )
    ( -1 if s1 < s2, 0 if s1 = s2 or +1 if s1 > s2 )
    ( LES: Ported from fbForth's SCMP )
    depth$ 2 < if 9903 (throw) then 
    1 (addrof$) cell+  ($sp@) cell+  ( get 1st char addresses of s1 & s2)
    ( get string lengths, copy, take diff; get sign to return stack)
    1 (addrof$) (lenof$) len$ over over - (sgn) >r
    ( get min strlen for limit; 0 flag start; swap with limit)
    min 0 swap 0 do  
        drop                ( drop 0 flag)
        over i + c@         ( get next char of str1)
        over i + c@ - (sgn) ( get next char of str2, take diff; get sign)
        dup if leave then   ( dup it and leave loop if not 0)
    loop
    ( pop cnt-diff sign from return stack; copy loop result; see if 0)
    r> over 0=      
    if              ( it's 0, so or with cnt diff for final answer)
        or
    else            ( it's not 0, so drop cnt-diff sign )
        drop
    then
    -rot drop drop  ( get rid of leftover str1 and str2 pointers)
; 

expose reset$   expose depth$   expose $const   expose clen$
expose maxLen$  expose .$const  expose :="      expose $"
expose >$       expose pick$    expose dup$     expose drop$
expose swap$    expose nip$     expose over$    expose rot$
expose -rot$    expose len$     expose >$const  expose +$
expose mid$     expose left$    expose right$   expose findc$
expose find$    expose .$       expose rev$     expose ltrim$
expose rtrim$   expose trim$    expose replace$ expose ucase$
expose lcase$   expose ==$?     expose val$     expose $.s
expose cmp$

end-application

base ! \ restore systems' current number base

$" RED" $" GREEN" $" BLUE" $.S
